
/*

NOTES

Replication do-file for publication in World Development for 
		Resource rents, coercion, and local development: 
			Evidence from post-apartheid South Africa

					Paulo Bastos 	(World Bank)
					Nicolas Bottan 	(Cornell University)

This do-file generates Tables 1 to 3 and A1.
					


Data sources by Table:

Table 1:
- 1996 OHS (https://microdata.worldbank.org/index.php/catalog/960)
- 2011 LFS (https://microdata.worldbank.org/index.php/catalog/1302)

Table 2:
- 1996 Census (https://microdata.worldbank.org/index.php/catalog/915)

Table 3 and A1:
- PALMS (https://www.datafirst.uct.ac.za/dataportal/index.php/catalog/434)

*/


* Set WD
cd ""

* Define paths
global original "data/original/"
global arm "data/"
global results "results/"






* Table 1: Unionization shares by industry

*** Prepare House data
use ${original}ohs1996/OHS_1996_House_v1.1.dta, clear

* Rename
rename typedwel typedwell
rename mainroof roof
rename mainwall walls
rename dwellown ownership
rename nofrooms nrrooms
rename mainwate watersource
rename lighting energylightsource
rename rubbishr refusedisposal
rename strlight streetlight
rename eeindwel toilet
rename epersons hhsize
* Generate/Recode variables
foreach var in typedwell roof walls watersource energylightsource refusedisposal {
recode `var' 0=.
}
recode ownership (0=.) (2=0)
recode streetlight (0=.) (2=0)
recode toilet (.=2)
recode toilet (0=.) (2=0)

keep mdnumber eanumber vpnumber hhsize typedwell roof walls watersource energylightsource refusedisposal ownership streetlight toilet

sort mdnumber eanumber vpnumber
tempfile house
save "`house'", replace

*** Prepare Worker data
use ${original}ohs1996/OHS_1996_Worker_v1.1.dta, clear

* Keep target population
keep if age>=15 & age<=65

* Rename variables
rename actemplo industrydet
rename ejobdesc occupdet
rename eworkwho mainwork
rename strtempy yearjobstart
rename strtempm monthjobstart
rename memunion union
rename eeincome salarycat
rename tothours hrsworkedusually
rename newpwgt1 weight
rename edidwork workpast7days

rename motheraa motheralive
rename fatheraa fatheralive
rename maritals maritalstat
rename bithmdno districtofbirth
rename bithcntr countryofbirth
rename nevrmove nevermove
rename higheted educlevel
rename eegender gender
rename iipmonth illnesspmonth


* Generate/recode variables
recode motheralive (0 3=.)
recode motheralive (2=0)
recode fatheralive (0 3=.)
recode fatheralive (2=0)
recode maritalstat 0=.
recode educlevel 99=.
recode illnesspmonth 0=.

recode mainwork 0=.
recode union 0=.
recode union 2=0
recode workpast7days 0=.

destring salarycat, replace
recode salarycat 0=.

gen industry=substr(industrydet,1,1)
destring industry, replace
label define ind 0 "Private households, etc" 1 "Agriculture" 2 "Mining" 3 "Manufacturing" 4 "Utilities Supply" 5 "Construction" 6 "Wholesale" 7 "Transport and communication" 8 "Financial" 9 "Social services"
label values industry ind
destring industrydet, replace

gen occupation=substr(occupdet,1,1)
destring occupation, replace
label define occ 0 "Unspecified" 1 "Legislators, senior officials, managers" 2 "Professionals" 3 "TECHNICIANS AND ASSOCIATE PROFESSIONALS" 4 "CLERKS" 5 "SERVICE WORKERS AND SHOP AND MARKET SALES WORKERS" 6 "SKILLED AGRICULTURAL AND FISHERY WORKERS" 7 "CRAFT AND RELATED TRADES WORKERS" 8 "PLANT AND MACHINE OPERATORS AND ASSEMBLERS" 9 "ELEMENTARY OCCUPATIONS"
label values occupation occ
destring occupdet, replace
destring hrsworkedusually, replace

replace yearjobstart="19"+yearjobstart
destring yearjobstart, replace
gen yrexperience=1996-yearjobstart

gen male=gender==1

* Generate midSalary variable
gen salary=.
replace salary =0 if salarycat==1
replace salary =1+(2400-1)/2 if salarycat==2
replace salary =2401+(6000-2401)/2 if salarycat==3
replace salary =6001+(12000-6001)/2 if salarycat==4
replace salary =12001+(18000-12001)/2 if salarycat==5
replace salary =18001+(30000-18001)/2 if salarycat==6
replace salary =30001+(42000-30001)/2 if salarycat==7
replace salary =42001+(54000-42001)/2 if salarycat==8
replace salary =54001+(72000-54001)/2 if salarycat==9
replace salary =72001+(96000-72001)/2 if salarycat==10
replace salary =96001+(132000-96001)/2 if salarycat==11
replace salary =132001+(192000-132001)/2 if salarycat==12
replace salary =192001+(360000-192001)/2 if salarycat==13
replace salary =360001 if salarycat==14
* Deflate by CPI: Jan 1996=.372
replace salary=salary/0.372
gen lsalary=log(salary)


tostring distr, gen(mdnumber)
tostring ea, gen(eanumber) format(%04.0f)
tostring visp, gen(vpnumber) format(%03.0f)

sort mdnumber eanumber vpnumber
merge mdnumber eanumber vpnumber using "`house'"
keep if _mer==3
drop _mer

gen year=1996

save ${arm}ohs1996, replace

use ${arm}ohs1996, clear

* Output for Table 1 col. 1
table industry [aw=weight], c(mean union)






use ${original}lfs2011_1/QLFS_2011_1_Worker_v1.dta, clear


* Rename Variables
rename Q15POPULATION popgroup
rename Q13GENDER gender
rename Q16MARITALSTATUS maritalstat
rename Q17EDUCATION educlevel


* Keep target population
keep if q14age>=15 & q14age<=65

* Rename variables
rename q14age age
rename Q312EVERWRK everworked
rename indus industry
rename occup occupation
rename Previndus previndustry
rename PrevOccup prevoccupation
rename status empstatus1
rename Q43INDUSTRY industrydet
rename Q316PREVINDUSTRY previndustrydet
rename Q45WRK4WHOM mainwork
rename Q44YEARSTART yearjobstart
rename Q44MONTHSTART monthjobstart
rename Q411CONTRACTTYPE joblength
rename Q412BMEMUNION union
rename Q416NRWORKERS nremployees
rename Q46PENSION pension
rename Q418HRSWRK hrsworkedusually
rename Q419TOTALHRS hrsworkedlastwk


* Recode
recode union 0 3=.
recode union 2=0


gen year=2011
save ${arm}lfs2011, replace

use ${arm}lfs2011, clear

* Output for Table 1 col. 1
table industry [aw=weight], c(mean union) format(%09.0g)
















* Table 2: Initial wage differentials, individual level data, 1996

** 1996 Census

* Prepare Household Data
use ${original}census1996/SA_Census_1996_Hhold_v1.2_w_hhid.dta, clear

keep hhid dwelling rooms owend fuel* water refuse toilet telephon hhsize peshhwei 

recode rooms 999 = .
recode owend (9 = .) (2 = 0)

sort hhid
tempfile hh
save "`hh'", replace



* Prepare Person Data
use ${original}census1996/SA_Census_1996_Person_v1.2_hhid.dta, clear

* Recode/Generate variables
gen male=sex==1
recode race 9=.

gen yrlyincome=.
replace yrlyincome=0 if income==1
replace yrlyincome=1206 if income==2
replace yrlyincome=4206 if income==3
replace yrlyincome=9006 if income==4
replace yrlyincome=15006 if income==5
replace yrlyincome=24006 if income==6
replace yrlyincome=36006 if income==7
replace yrlyincome=48006 if income==8
replace yrlyincome=63006 if income==9
replace yrlyincome=84006 if income==10
replace yrlyincome=114006 if income==11
replace yrlyincome=162006 if income==12
replace yrlyincome=276006 if income==13
replace yrlyincome=360012 if income==14
replace yrlyincome=yrlyincome/0.372


tostring industr, gen(iaux)
rename industr industrydet
replace iaux=substr(iaux,1,1)
rename iaux industry
destring industry, replace
label define ind 1 "Agriculture, hunting, forestry and fish" 2 "Mining and quarrying" 3 "Manufacturing" 4 "Utilities" 5 "Construction" 6 "Wholesale, retail" 7 "Transport, storage and communication" 8 "Financial intermediation, insurance, re" 9 "Community, social and personal services"
label values industry ind


recode occupat (960 996 999 = .)
tostring occupat, gen(occupation)
replace occupation=substr(occupation,1,1)
destring occupation, replace
label define occ 1 "Legislators" 2 "Professionals" 3 "Technicians" 4 "Clerks" 5 "Service workers" 6 "Skilled agricultural and fishery" 7 "Craft and related trade workers" 8 "Plant and machine operators" 9 "Elementary occupations"
label values occupation occ


rename wmployme workstatus
recode workstatus (7 8 9 = .)

rename deducode educ
recode educ (97 99 = .)

recode marstau (8 9 = .)
rename marstau marstat


gen black=race==1
gen mining=industry==2
gen linc=log(yrlyincome)
gen age2=age^2



keep if workstatus==3 & male==1
keep if age>=18 & age<=50



* Basic
xi: reg linc i.black*mining [w=pespweig], r
outreg2 using ${results}table2.xls, keep(_Iblack_1 mining _IblaXminin_1) excel nocons bracket bdec(3) replace
 
* Individual controls
xi: reg linc i.black*mining age age2 i.educ chilborn i.occupation [w=pespweig], r
outreg2 using ${results}table2.xls, keep(_Iblack_1 mining _IblaXminin_1) excel nocons bracket bdec(3) append

* Controls + Province
xi: reg linc i.black*mining age age2 i.educ chilborn i.occupation i.province [w=pespweig], r
outreg2 using ${results}table2.xls, keep(_Iblack_1 mining _IblaXminin_1) excel nocons bracket bdec(3) append

* Controls + District
xi: areg linc i.black*mining age age2 i.educ chilborn i.occupation [w=pespweig], r abs(district)
outreg2 using ${results}table2.xls, keep(_Iblack_1 mining _IblaXminin_1) excel nocons bracket bdec(3) append












* Table 3: Trends in Real Wages from Survey Data

use "${original}palms/palms.dta", clear


*** Prepare data

** Sample selection

* Males
keep if gender==1

* Work-age (18-50)
keep if age>=18 & age<=50

* Working
keep if empstat1==1

* Not Self-employed
keep if (employer1==1 & year<2000) | (employer==1 & (year>=2000 & year<=2007)) | (employer2==1 & year>2007)

* Drop missings
drop if popgroup==5
drop if marstat>4


gen lwage=log(realearning)
drop if outlier==1

*** Prepare variables

gen black=popgroup==1
gen coloured=popgroup==2
gen indasian=popgroup==3

drop if inlist(jobindcode,11,99)

gen agriculture=jobindcode==1
gen mining=jobindcode==2
gen manufacture=jobindcode==3
gen utilities=jobindcode==4
gen construct=jobindcode==5
gen retail=jobindcode==6
gen transport=jobindcode==7
gen finance=jobindcode==8
gen services=jobindcode==9
gen domserv=jobindcode==10

gen quarter=.
replace quarter=1 if inlist(wave,7, 9, 11, 13, 15, 17, 19, 21, 23, 27, 31, 35, 39, 43, 47, 51)
replace quarter=2 if inlist(wave,24, 28, 32, 36, 40, 44, 48, 52)
replace quarter=3 if inlist(wave,8, 10, 12, 14, 16, 18, 20, 22, 25, 29, 33, 37, 41, 45, 49, 53)
replace quarter=4 if inlist(wave,0, 1, 2, 3, 4, 5, 6, 26, 30, 34, 38, 42, 46, 50, 54)

*** Generate Industry-trends
foreach var in agriculture mining utilities construct retail transport finance services domserv {
	gen T`var'=`var'*year
	gen BT`var'=T`var'*black
	gen B`var'=`var'*black
	}

gen age2=age^2

egen raceoccup=group(popgroup jobocccode)
egen raceoccup2=group(popgroup occupation)
egen raceeduc=group(popgroup yrseduc)
egen racemartial=group(popgroup marstat)

egen provyear=group(province year)

local BT "BTagriculture BTmining BTutilities BTconstruct BTretail BTtransport BTfinance BTservices BTdomserv"
local T "Tagriculture Tmining Tutilities Tconstruct Tretail Ttransport Tfinance Tservices Tdomserv"
local B "Bagriculture Bmining Butilities Bconstruct Bretail Btransport Bfinance Bservices Bdomserv agriculture mining utilities construct retail transport finance services domserv"
local controls "black coloured indasian year i.year i.year#c.coloured i.year#c.black i.year#c.indasian i.raceoccup i.raceeduc i.popgroup#c.age i.popgroup#c.age2 age age2 i.racemartial i.wave"



reg lwage `BT' `T' `B' `controls' [w=ceweight2], r
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) replace bracket nocons
areg lwage `BT' `T' `B' `controls' [w=ceweight2], r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
areg lwage `BT' `T' `B' `controls' if inperson==1 [w=ceweight2], r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
areg lwage `BT' `T' `B' `controls' if publicemp==0 [w=ceweight2], r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
areg lwage `BT' `T' `B' `controls' if jobunion==1 [w=ceweight2], r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
areg lwage `BT' `T' `B' `controls' if year<=2011 [w=ceweight2], r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
areg lwage `BT' `T' `B' `controls', r abs(provyear)
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons
	


*** Not dropping females nor self-employed

use "${original}palms/palms.dta", clear


*** Prepare data

** Sample selection

* Work-age (18-50)
keep if age>=18 & age<=50

* Working
keep if empstat1==1

* Drop missings
drop if popgroup==5
drop if marstat>4


gen lwage=log(realearning)
drop if outlier==1


*** Prepare variables

gen black=popgroup==1
gen coloured=popgroup==2
gen indasian=popgroup==3

drop if inlist(jobindcode,11,99)

gen agriculture=jobindcode==1
gen mining=jobindcode==2
gen manufacture=jobindcode==3
gen utilities=jobindcode==4
gen construct=jobindcode==5
gen retail=jobindcode==6
gen transport=jobindcode==7
gen finance=jobindcode==8
gen services=jobindcode==9
gen domserv=jobindcode==10

gen quarter=.
replace quarter=1 if inlist(wave,7, 9, 11, 13, 15, 17, 19, 21, 23, 27, 31, 35, 39, 43, 47, 51)
replace quarter=2 if inlist(wave,24, 28, 32, 36, 40, 44, 48, 52)
replace quarter=3 if inlist(wave,8, 10, 12, 14, 16, 18, 20, 22, 25, 29, 33, 37, 41, 45, 49, 53)
replace quarter=4 if inlist(wave,0, 1, 2, 3, 4, 5, 6, 26, 30, 34, 38, 42, 46, 50, 54)

*** Generate Industry-trends
foreach var in agriculture mining utilities construct retail transport finance services domserv {
	gen T`var'=`var'*year
	gen BT`var'=T`var'*black
	gen B`var'=`var'*black
	}

gen age2=age^2

egen raceoccup=group(popgroup jobocccode)
egen raceoccup2=group(popgroup occupation)
egen raceeduc=group(popgroup yrseduc)
egen racemartial=group(popgroup marstat)

egen provyear=group(province year)

local BT "BTagriculture BTmining BTutilities BTconstruct BTretail BTtransport BTfinance BTservices BTdomserv"
local T "Tagriculture Tmining Tutilities Tconstruct Tretail Ttransport Tfinance Tservices Tdomserv"
local B "Bagriculture Bmining Butilities Bconstruct Bretail Btransport Bfinance Bservices Bdomserv agriculture mining utilities construct retail transport finance services domserv"
local controls1 "i.popgroup#c.age i.popgroup#c.age2 age age2 i.year#c.coloured"
local controls2 "i.gender i.empstat1 i.empstat1#i.year i.empstat1#i.gender black coloured indasian year i.year i.gender#i.raceeduc  i.year#i.gender i.year#c.black i.year#c.indasian i.raceoccup i.gender#i.raceoccup i.raceoccup#i.empstat1 i.raceeduc i.racemartial i.wave" 

reghdfe lwage `BT' `T' `B' `controls1' [w=ceweight2], vce(r) a(provyear `controls2')
	outreg2 using ${results}table3.xls, keep(`BT' `T' black) excel bdec(4) append bracket nocons

	
	
	
	
	
	


* Table A1: Changes in real wages by race and sector, survey data, 1994-2014

use ${original}palms/palms.dta, clear


*** Prepare data

** Sample selection

* Males
keep if gender==1

* Work-age (18-50)
keep if age>=18 & age<=50

* Working
keep if empstat1==1

* Not Self-employed
keep if (employer1==1 & year<2000) | (employer==1 & (year>=2000 & year<=2007)) | (employer2==1 & year>2007)

* Drop missings
drop if popgroup==5
drop if marstat>4


gen lwage=log(realearning)
drop if outlier==1


*** Prepare variables

gen black=popgroup==1
gen coloured=popgroup==2
gen indasian=popgroup==3

drop if inlist(jobindcode,11,99)

gen agriculture=jobindcode==1
gen mining=jobindcode==2
gen manufacture=jobindcode==3
gen utilities=jobindcode==4
gen construct=jobindcode==5
gen retail=jobindcode==6
gen transport=jobindcode==7
gen finance=jobindcode==8
gen services=jobindcode==9
gen domserv=jobindcode==10

gen quarter=.
replace quarter=1 if inlist(wave,7, 9, 11, 13, 15, 17, 19, 21, 23, 27, 31, 35, 39, 43, 47, 51)
replace quarter=2 if inlist(wave,24, 28, 32, 36, 40, 44, 48, 52)
replace quarter=3 if inlist(wave,8, 10, 12, 14, 16, 18, 20, 22, 25, 29, 33, 37, 41, 45, 49, 53)
replace quarter=4 if inlist(wave,0, 1, 2, 3, 4, 5, 6, 26, 30, 34, 38, 42, 46, 50, 54)

*** Generate Industry-trends
foreach var in agriculture mining utilities construct retail transport finance services domserv {
	gen T`var'=`var'*year
	gen BT`var'=T`var'*black
	gen B`var'=`var'*black
	}

gen age2=age^2

egen raceoccup=group(popgroup jobocccode)
egen raceoccup2=group(popgroup occupation)
egen raceeduc=group(popgroup yrseduc)
egen racemartial=group(popgroup marstat)

egen provyear=group(province year)

keep if year==1994 | year==2014

gen t=year==2014

foreach var in agriculture mining utilities construct retail transport finance services domserv black {
	gen t_`var'=`var'*t
	label var t_`var' "After * `var'"
	}
label var t "After (2014)"

reg lwage t black t_black [w=ceweight2], r
outreg2 using ${results}tableA1.xls, excel bdec(4) replace bracket nocons label

reg lwage t agriculture mining utilities construct retail transport finance services domserv t_agriculture - t_domserv [w=ceweight2], r
outreg2 using ${results}tableA1.xls, excel bdec(4) append bracket nocons label
	
reg lwage t agriculture mining utilities construct retail transport finance services domserv black t_* [w=ceweight2], r
outreg2 using ${results}tableA1.xls, excel bdec(4) append bracket nocons label
